#!/usr/bin/env bash

# Show status of Slurm job(s).
# Both queue information and accounting information is printed.
# Homepage: https://github.com/OleHolmNielsen/Slurm_tools/

# Check number of arguments
if test $# -ne 1 
then
	echo "Usage: showjob <jobid>"
	exit 1
fi
# Check that jobid is a number
if ! [[ ${1} =~ ^[0-9]+$ ]]
then
	echo "ERROR: <jobid> must be a number"
	echo "Usage: showjob <jobid>"
	exit 1
fi

jobid=$1
TMPFILE=/tmp/showjob.$$
# Check the showuserlimits command
showuserlimits=`which showuserlimits`
if test -z "$showuserlimits"
then
	echo Did not find the showuserlimits command, please install it.
	showuserlimits="echo showuserlimits"
fi

###########################################################################
#
# Get Slurm queue information
#

# Check if job is in the queue and squeue displays job information
jobfound=`squeue -ho "%i" -j $jobid 2>/dev/null`
if test $? -eq 0 -a -n "$jobfound"
then
	# Print basic job information
	#
	# Get required job information from a single squeue command:
	eval `squeue -ho "%u %a %P %T %C %l %r %V %E" -j $jobid | awk '{
		print "jobuser=" $1
		print "jobaccount=" $2
		print "partition=" $3
		print "jobstate=" $4
		print "NCPUS=" $5
		print "timelimit=" $6
		print "reason=" $7
		print "submittime=" $8	# Note: Time %V may modified by SLURM_TIME_FORMAT
		print "dependency=" $9	# Note: Dependency %E may be a null string and must be last
	}'`
	# Get time limit as a number of minutes
	minutes=`echo $timelimit | awk -F- '
	{
		if (NF == 1)
			# If timelimit has 0 days there is only the hour:minute:second field
			hms = $1
		else {
			days = $1
			hms = $2
		}
		split(hms,a,":")	# Split hour:minute:second
		print days*24*60 + a[1]*60 + a[2]	# Print number of minutes
	}'`
	echo "Job $jobid was submitted by user $jobuser in account $jobaccount on $submittime"
	echo "This job requests $NCPUS CPUs and has a time limit of $timelimit (days-hh:mm:ss) = $minutes min."
	echo "The job has state=$jobstate with reason=$reason"
	echo

	# Display job reason code (if other than "None")
	if test "$reason" != "None"
	then
		if test "$reason" == "AssocGrpCPURunMinutesLimit"
		then
			echo Information about $reason:
			JobValue=$((NCPUS*minutes))
			echo "This job requires TRESRunMins:   cpu=$JobValue"
			echo
			echo Information about Account and User limits:
			$showuserlimits -u $jobuser -A $jobaccount -p $partition -l GrpTRESRunMins -s cpu
		elif test "$reason" == "AssocGrpCPUMinutesLimit"
		then
			echo Information about $reason:
			echo "This job requires TRESMins:    cpu=$((NCPUS*minutes))"
			echo
			echo Information about Account and User limits:
			$showuserlimits -u $jobuser -A $jobaccount -p $partition -l GrpTRESMins -s cpu
		elif test "$reason" == "AssocGrpCpuLimit"
		then
			echo Information about $reason:
			echo -n "Current user TRES is:      "
			# Count number of CPUs for running jobs
			squeue -ho "%C" -u $jobuser -t running | awk '{ncpus+=$1}END{print "cpu=" ncpus}'
			NCPUS=`squeue -ho "%C" -j $jobid`
			echo "This job requires TRES:    cpu=$NCPUS"
			echo
			echo Information about Account and User limits:
			$showuserlimits -u $jobuser -A $jobaccount -p $partition -l GrpTRES -s cpu
		elif test "$reason" == "AssocGrpGRES"
		then
			echo Information about $reason:
			gres=`squeue -ho "%b" -j $jobid`
			grestype=`echo $gres | awk -F: '{print $1}'`
			echo "This job requires GRES: $gres"
			echo
			echo Information about Account and User limits:
			$showuserlimits -u $jobuser -A $jobaccount -p $partition -l GrpTRES -s $grestype
		elif test "$reason" == "AssocMaxJobsLimit"
		then
			echo Information about $reason:
			echo User $jobuser number of running jobs is: `squeue -ho "%A" -u $jobuser -t running | wc -l`
			echo Information about Account and User limits:
			$showuserlimits -u $jobuser -A $jobaccount -p $partition -l MaxJobs
		elif test "$reason" == "Dependency"
		then
			echo "Dependency information: $dependency"
		fi
		echo
	fi
	# Job found in the queue
	echo "Queued job information:"
	scontrol --details show job $jobid | tee $TMPFILE
	# Parse the job status information (read E-mail)
	export jobscript=`grep Command= $TMPFILE | sed 's/^   Command=//'`
	if test "$jobscript" != "(null)"
	then
		grep "mail-user" $jobscript > $TMPFILE
		if test -s $TMPFILE
		then
			echo Job script E-mail has been set:
			cat $TMPFILE
		fi
	fi
	rm -f $TMPFILE
else
	echo "The job $jobid is not in the current Slurm queue."
fi

###########################################################################
# Execute sacct and print nicely formatted output

function format_sacct()
{
	# echo Called format_sacct with $@
	# Add -P flag for parseable output with fields separated by "|"
	sacct -P $@ | awk -F "|" '
{
	for (i=1; i<= NF; i++) {
		column[i][NR] = $i
		len = length($i)
		if (len > colwidth[i]) colwidth[i] = len	# Column width
	}
	cols=NF
	lines=NR
} END {
	# Print a header
	for (i=1; i<= cols; i++) {
		printf("%*s ", colwidth[i], column[i][1])
	}
	printf("\n")
	for (i=1; i<= cols; i++) {
		printf("%.*s ", colwidth[i], "-----------------------------------------------------")
	}
	printf("\n")
	# Print lines
	for (l=2; l<=lines; l++) {
		for (i=1; i<= cols; i++) {
			printf("%*s ", colwidth[i], column[i][l])
		}
		printf("\n")
	}
}'

}

###########################################################################
# Get job accounting information

echo
echo "Accounting information from the Slurm database:"
echo
echo "Job parameters for jobid $jobid:"
export jobvars="jobid,jobname,user,account,partition,Timelimit"
format_sacct -j $jobid -o $jobvars

echo
echo "Job details information for jobid $jobid:"
export jobvars="jobid,Start,elapsed,End,CPUTime,NNodes,NCPUS,ExitCode,nodelist"
format_sacct -j $jobid -o $jobvars
